library(ggplot2) # Data visualization
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(corrplot)
## corrplot 0.92 loaded
library(leaflet)
## Warning: package 'leaflet' was built under R version 4.2.3
library(scatterplot3d)
library(plotly)
## Warning: package 'plotly' was built under R version 4.2.3
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(TSstudio)
## Warning: package 'TSstudio' was built under R version 4.2.3
df<- read.csv("C:/Users/HP/Desktop/padhai/DV/JCOMP/datasets/all_month.csv", header=TRUE, sep=',', stringsAsFactors = FALSE)
# View(df)
glimpse(df)
## Rows: 12,230
## Columns: 22
## $ time            <chr> "2023-03-22T18:28:38.650Z", "2023-03-22T18:20:12.680Z"…
## $ latitude        <dbl> 33.32250, 38.83567, 38.83733, 38.75783, 62.14720, 38.8…
## $ longitude       <dbl> -116.3650, -122.8068, -122.7985, -122.7313, -152.0990,…
## $ depth           <dbl> 5.950, 2.190, 2.100, 1.950, 101.500, 2.220, 29.080, 6.…
## $ mag             <dbl> 1.01, 1.15, 2.11, 0.79, 2.40, 0.72, 2.27, 1.12, 1.93, …
## $ magType         <chr> "ml", "md", "md", "md", "ml", "md", "ml", "ml", "md", …
## $ nst             <int> 22, 24, 38, 9, NA, 12, 34, 5, 30, NA, 31, 5, 12, 34, 1…
## $ gap             <dbl> 69, 52, 41, 137, NA, 60, 119, 116, 172, NA, 147, 212, …
## $ dmin            <dbl> 0.061090, 0.012510, 0.006460, 0.006300, NA, 0.016190, …
## $ rms             <dbl> 0.23, 0.02, 0.05, 0.03, 0.63, 0.02, 0.12, 0.03, 0.13, …
## $ net             <chr> "ci", "nc", "nc", "nc", "ak", "nc", "hv", "av", "hv", …
## $ id              <chr> "ci40188343", "nc73860690", "nc73860675", "nc73860670"…
## $ updated         <chr> "2023-03-22T18:32:12.443Z", "2023-03-22T18:21:47.143Z"…
## $ place           <chr> "8km N of Borrego Springs, CA", "7km WNW of Cobb, CA",…
## $ type            <chr> "earthquake", "earthquake", "earthquake", "earthquake"…
## $ horizontalError <dbl> 0.36, 0.21, 0.17, 0.51, NA, 0.32, 0.62, 0.56, 0.64, NA…
## $ depthError      <dbl> 0.830, 0.490, 0.300, 0.610, 0.900, 1.070, 0.960, 0.670…
## $ magError        <dbl> 0.14100000, 0.20000000, 0.15000000, 0.10000000, NA, 0.…
## $ magNst          <int> 14, 24, 37, 8, NA, 12, 5, 5, 7, NA, 3, 5, 12, 33, 12, …
## $ status          <chr> "automatic", "automatic", "automatic", "automatic", "a…
## $ locationSource  <chr> "ci", "nc", "nc", "nc", "ak", "nc", "hv", "av", "hv", …
## $ magSource       <chr> "ci", "nc", "nc", "nc", "ak", "nc", "hv", "av", "hv", …
df[is.na(df)] <- 0
df$date <- as.Date(substring(df$time,1,10),"%Y-%m-%d")
earthquakeCor <- df[,c("latitude","longitude","depth","mag", "nst","gap",
                       "dmin","rms","horizontalError","depthError","magError","magNst")]
correlations <- cor(earthquakeCor)
p <- corrplot(correlations, method="circle")

df %>% group_by(magNst) %>% 
  summarize(avg = mean(mag), min=min(mag), max=max(mag), events = length(mag)) %>% 
  ungroup() -> dfm
head(dfm)
## # A tibble: 6 × 5
##   magNst   avg   min   max events
##    <dbl> <dbl> <dbl> <dbl>  <int>
## 1      0 1.65  -0.45  5.4    3321
## 2      1 0.766 -0.5   4.2      51
## 3      2 1.23  -1.04  4.4     152
## 4      3 1.20  -1.33  4.5     365
## 5      4 1.05  -1     4.7    1444
## 6      5 1.13  -1.04  4.54   1009
glimpse(dfm)
## Rows: 181
## Columns: 5
## $ magNst <dbl> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 1…
## $ avg    <dbl> 1.6453418, 0.7656863, 1.2265132, 1.1999178, 1.0513019, 1.127879…
## $ min    <dbl> -0.45, -0.50, -1.04, -1.33, -1.00, -1.04, -0.90, -0.77, -0.88, …
## $ max    <dbl> 5.40, 4.20, 4.40, 4.50, 4.70, 4.54, 4.70, 5.40, 4.70, 4.70, 5.3…
## $ events <int> 3321, 51, 152, 365, 1444, 1009, 600, 502, 504, 382, 339, 281, 3…
tail(dfm)
## # A tibble: 6 × 5
##   magNst   avg   min   max events
##    <dbl> <dbl> <dbl> <dbl>  <int>
## 1    438   5     5     5        1
## 2    450   5     5     5        1
## 3    460   5.5   5.5   5.5      1
## 4    502   4.9   4.9   4.9      1
## 5    594   5.2   5.2   5.2      1
## 6    679   4.9   4.9   4.9      1
ggplot(df, aes(x=date, y=mag)) + geom_point() + geom_line(linetype="dotted",color="blue") + theme_bw() + 
  labs(title="Time series for all month US Dataset", x="Date", y="Magnitude")

dfmag <- df %>% filter(mag >= 5) 
ggplot(dfmag, aes(x=date, y=mag)) + geom_point() + geom_line(linetype="solid",color="blue") + theme_bw() + 
  labs(title="Time series for large (magnitude > 5)", 
       x="Date", y="Magnitude")

dat_ts <- ts(dfmag[,c("mag")])
ts_plot(dat_ts)
dat_ts <- ts(dfmag[,c("latitude")])
ts_plot(dat_ts)
dat_ts <- ts(dfmag[,c("longitude")])
ts_plot(dat_ts)
boxplot(df$mag, main="Boxplot for magnitude", ylab="Magnitude")

boxplot(df$depth, main="Boxplot for depth", ylab="Depth")

colors = c("red", "blue", "yellow", "green", "magenta",
                "lightblue", "grey", "lightgreen", "darkblue", "cyan")
boxplot(mag ~ magSource, data=df, col=colors, xlim=c(0.5,10), ylim=c(1,8), 
                        ylab="Magnitude", main="Magnitude, grouped by magnitude source")

colors = c("red", "blue", "yellow", "green", "magenta",
                "lightblue", "grey", "lightgreen", "darkblue", "cyan")
boxplot(mag ~ magType, data=df, col=colors, xlim=c(0.5,10), ylim=c(1,8),
                        ylab = "Magnitude", main="Magnitude grouped by magnitude type")

attach(df)
scatterplot3d(longitude,latitude,mag, pch=16, highlight.3d=TRUE,
              type="h",
              main="Earthquakes \nMagnitude vs. position",
              xlab="Longitude",
              ylab="Latitude",
              zlab="Magnitude [Richter]")

dfmag$date <- as.integer(strftime(dfmag$time, "%d"))
dfmag$date
##   [1] 22 22 22 22 22 22 21 21 21 21 21 21 20 19 19 18 18 18 17 17 17 17 16 16 16
##  [26] 16 16 16 16 15 15 14 14 14 14 14 13 13 13 13 12 12 12 11 11 11 11 10 10 10
##  [51] 10 10  9  9  8  8  8  8  7  7  7  7  7  7  7  7  6  6  6  6  5  5  5  5  4
##  [76]  4  4  4  3  3  3  3  3  3  3  3  2  2  2  2  2  2  2  2  2  2  1  1  1  1
## [101]  1  1 28 28 28 28 28 28 28 27 27 27 27 27 27 26 26 26 26 25 25 25 25 25 25
## [126] 25 24 23 23 23 23 23 23 23 22 22 21 21 21 21 21 21
# create column indicating point color
dfmag$pcolor <- "darkgreen" #all other than Bucharest or US
dfmag$pcolor[dfmag$locationSource=="buc"] <- "red"
dfmag$pcolor[dfmag$locationSource=="us"] <- "blue"
with(dfmag, {
s3d <- scatterplot3d(date, depth, mag,        # x y and z axis
     color=pcolor, pch=19,        # circle color indicates location Source
     type="h", lty.hplot=2,       # lines to the horizontal plane
     scale.y=.95,                 # scale y axis (reduce by 25%)
     main="Major earthquakes (magnitude > 5)",
     xlab="Date",
     ylab="Depth [Km]",
     zlab="Magnitude [Richter]")
s3d.coords <- s3d$xyz.convert(date, depth, mag)
text(s3d.coords$x, s3d.coords$y,     # x and y coordinates
     labels=place,       # text to plot
     pos=4, cex=.5)                  # shrink text 50% and place on right side of points)
# add the legend
legend("topleft", inset=.05,      # location and inset
       bty="n", cex=.75,              # suppress legend box, shrink text 50%
       title="Location source",
       c("gcmt", "US", "other"), fill=c("red", "blue", "darkgreen"))
})

with(dfmag, {
  s3d <- scatterplot3d(date, depth, mag,        # x y and z axis
                       color=pcolor, pch=19,        # circle color indicates location Source
                       type="h", lty.hplot=2,       # lines to the horizontal plane
                       scale.y=.75,                 # scale y axis (reduce by 25%)
                       main="Major earthquakes (magnitude > 5)",
                       xlab="Date",
                       ylab="Depth [Km]",
                       zlab="Magnitude [Richter]")
  s3d.coords <- s3d$xyz.convert(date, depth, mag)
  text(s3d.coords$x, s3d.coords$y,     # x and y coordinates
       labels=magType,       # text to plot
       pos=4, cex=.5)                  # shrink text 50% and place on right side of points)
  # add the legend
  legend("topleft", inset=.05,      # location and inset
         bty="n", cex=.75,              # suppress legend box, shrink text 50%
         title="Location source",
         c("gcmt", "US", "other"), fill=c("red", "blue", "darkgreen"))
})